!pip install cufflinks
import scipy as sp
import plotly.plotly as py
import cufflinks as cf
import pandas as pd
import numpy as np
print (cf.__version__)
import plotly
plotly.offline.init_notebook_mode() # run at the start of every ipython notebook
#tutorial from
#https://plot.ly/python/ipython-notebook-tutorial/
import plotly.plotly as py
import plotly.figure_factory as ff
import pandas as pd
df = pd.read_csv("https://raw.githubusercontent.com/plotly/datasets/master/school_earnings.csv")
table
table = ff.create_table(df)
plotly.offline.iplot(table, filename='jupyter/table1')
#Use dataframe.column_title to index the dataframe:
schools = df.School
schools[0]
Most pandas functions also work on an entire dataframe. For example, calling std() calculates the standard deviation for each column.
df.std()
You can use Plotly's python API to plot inside your Jupyter Notebook by calling plotly.plotly.iplot() or plotly.offline.iplot() if working offline. Plotting in the notebook gives you the advantage of keeping your data analysis and plots in one place. Calling the plot with iplot automaticallly generates an interactive version of the plot inside the Notebook in an iframe.
import plotly.plotly as py
from plotly.graph_objs import *
data = [Bar(x=df.School,
y=df.Gap)]
plotly.offline.iplot(data)
Plotting multiple traces and styling the chart with custom colors and titles is simple with Plotly syntax.
trace_women = Bar(x=df.School,
y=df.Women,
name='Women',
marker=dict(color='#ffcdd2'))
trace_men = Bar(x=df.School,
y=df.Men,
name='Men',
marker=dict(color='#A2D5F2'))
trace_gap = Bar(x=df.School,
y=df.Gap,
name='Gap',
marker=dict(color='#59606D'))
data = [trace_women, trace_men, trace_gap]
layout = Layout(title="Average Earnings for Graduates",
xaxis=dict(title='School'),
yaxis=dict(title='Salary (in thousands)'))
fig = Figure(data=data, layout=layout)
plotly.offline.iplot(fig)
Now we have interactive charts displayed in our notebook. Hover on the chart to see the values for each bar, click and drag to zoom into a specific section or click on the legend to hide/show a trace.
import numpy as np
s = np.linspace(0, 2 * np.pi, 240)
t = np.linspace(0, np.pi, 240)
tGrid, sGrid = np.meshgrid(s, t)
r = 2 + np.sin(7 * sGrid + 5 * tGrid) # r = 2 + sin(7s+5t)
x = r * np.cos(sGrid) * np.sin(tGrid) # x = r*cos(s)*sin(t)
y = r * np.sin(sGrid) * np.sin(tGrid) # y = r*sin(s)*sin(t)
z = r * np.cos(tGrid) # z = r*cos(t)
surface = Surface(x=x, y=y, z=z)
data = Data([surface])
layout = Layout(
title='Parametric Plot',
scene=Scene(
xaxis=XAxis(
gridcolor='rgb(255, 255, 255)',
zerolinecolor='rgb(255, 255, 255)',
showbackground=True,
backgroundcolor='rgb(230, 230,230)'
),
yaxis=YAxis(
gridcolor='rgb(255, 255, 255)',
zerolinecolor='rgb(255, 255, 255)',
showbackground=True,
backgroundcolor='rgb(230, 230,230)'
),
zaxis=ZAxis(
gridcolor='rgb(255, 255, 255)',
zerolinecolor='rgb(255, 255, 255)',
showbackground=True,
backgroundcolor='rgb(230, 230,230)'
)
)
)
#Export Static Image Offline using image ='png'
fig = Figure(data=data, layout=layout)
plotly.offline.iplot(fig,image='png')
data = [dict(
visible = False,
line=dict(color='00CED1', width=6),
name = '𝜈 = '+str(step),
x = np.arange(0,10,0.01),
y = np.sin(step*np.arange(0,10,0.01))) for step in np.arange(0,5,0.1)]
data[10]['visible'] = True
steps = []
for i in range(len(data)):
step = dict(
method = 'restyle',
args = ['visible', [False] * len(data)],
)
step['args'][1][i] = True # Toggle i'th trace to "visible"
steps.append(step)
sliders = [dict(
active = 10,
currentvalue = {"prefix": "Frequency: "},
pad = {"t": 50},
steps = steps
)]
layout = dict(sliders=sliders)
fig = dict(data=data, layout=layout)
plotly.offline.iplot(fig)
We've seen how to embed Plotly tables and charts as iframes in the notebook, with IPython.display we can embed additional features, such a videos. For example, from YouTube:
from IPython.display import YouTubeVideo
YouTubeVideo("XUNaGFa9xCM")
import plotly.tools as tls
tls.embed('https://plot.ly/~cufflinks/8')
#We set the all charts as public
cf.set_config_file(sharing='private',theme='pearl',offline=True)
df = cf.datagen.lines()
print(df)
plotly.offline.iplot([{
'x': df.index,
'y': df[col],
'name': col
} for col in df.columns], filename='cufflinks/simple-line')
df.iplot(kind='scatter')
df = pd.DataFrame(np.random.randn(1000, 4), columns=['a', 'b', 'c', 'd'])
df.scatter_matrix(world_readable= False)
Charts created with cufflinks are synced with your online Plotly account. You'll need to configure your credentials to get started. cufflinks can also be configured to work offline in IPython notebooks with Plotly Offline. To get started with Plotly Offline, download a trial library and run cf.go_offline().
df.a.iplot(kind='histogram', world_readable=False)
df = pd.DataFrame(np.random.randn(1000, 2), columns=['A', 'B']).cumsum()
df.iplot(filename='cufflinks/line-example')
df.iplot(x='A', y='B', filename='cufflinks/x-vs-y-line-example')
Download some civic data. A time series log of the 311 complaints in NYC.
df = pd.read_csv('https://raw.githubusercontent.com/plotly/widgets/master/ipython-examples/311_150k.csv', parse_dates=True, index_col=1)
df.head(3)
series = df['Complaint Type'].value_counts()[:20]
series.head(3)
series.iplot(kind='bar', yTitle='Number of Complaints', title='NYC 311 Complaints',
filename='cufflinks/categorical-bar-chart')
df = pd.DataFrame(np.random.rand(10, 4), columns=['A', 'B', 'C', 'D'])
row = df.ix[5]
row.iplot(kind='bar')
Call iplot(kind='bar') on a dataframe to produce a grouped bar chart
df.iplot(kind='bar', filename='cufflinks/grouped-bar-chart')
df.iplot(kind='bar', barmode='stack')
Remember: plotly charts are interactive. Click on the legend entries to hide-and-show traces, click-and-drag to zoom, double-click to autoscale, shift-click to drag
df.iplot(kind='barh',barmode='stack', bargap=.1)
cufflinks ships with a few themes. View available themes with cf.getThemes, apply them with cf.set_config_file
cf.getThemes()
cf.set_config_file(theme='polar')
df = pd.DataFrame({'a': np.random.randn(1000) + 1,
'b': np.random.randn(1000),
'c': np.random.randn(1000) - 1})
df.iplot(kind='histogram')
df.iplot(kind='histogram', barmode='stack', bins=100, histnorm='probability')
df.iplot(kind='histogram', subplots=True, shape=(3, 1))
df = pd.DataFrame(np.random.rand(10, 5), columns=['A', 'B', 'C', 'D', 'E'])
df.iplot(kind='box')
To produce stacked area plot, each column must be either all positive or all negative values. When input data contains NaN, it will be automatically filled by 0. If you want to drop or fill by different values, use dataframe.dropna() or dataframe.fillna() before calling plot.
df.iplot(kind='area', fill=True)
df.iplot(fill=True)
Set x and y as column names. If x isn't supplied, df.index will be used.
df = pd.read_csv('http://www.stat.ubc.ca/~jenny/notOcto/STAT545A/examples/gapminder/data/gapminderDataFiveYear.txt', sep='\t')
df2007 = df[df.year==2007]
df1952 = df[df.year==1952]
df2007.iplot(kind='scatter', mode='markers', x='gdpPercap', y='lifeExp', filename='cufflinks/simple-scatter')
fig = {
'data': [
{'x': df2007.gdpPercap, 'y': df2007.lifeExp, 'text': df2007.country, 'mode': 'markers', 'name': '2007'},
{'x': df1952.gdpPercap, 'y': df1952.lifeExp, 'text': df1952.country, 'mode': 'markers', 'name': '1952'}
],
'layout': {
'xaxis': {'title': 'GDP per Capita', 'type': 'log'},
'yaxis': {'title': "Life Expectancy"}
}
}
plotly.offline.iplot(fig, filename='cufflinks/multiple-scatter')
plotly.offline.iplot(
{
'data': [
{
'x': df[df['year']==year]['gdpPercap'],
'y': df[df['year']==year]['lifeExp'],
'name': year, 'mode': 'markers',
} for year in [1952, 1982, 2007]
],
'layout': {
'xaxis': {'title': 'GDP per Capita', 'type': 'log'},
'yaxis': {'title': "Life Expectancy"}
}
}, filename='cufflinks/scatter-group-by')
Add size to create a bubble chart. Add hover text with the text attribute.
df2007.iplot(kind='bubble', x='gdpPercap', y='lifeExp', size='pop', text='country',
xTitle='GDP per Capita', yTitle='Life Expectancy',
filename='cufflinks/simple-bubble-chart')
subplots=True partitions columns into separate subplots. Specify rows and columns with shape=(rows, cols) and share axes with shared_xaxes=True and shared_yaxes=True.
df=cf.datagen.lines(4)
df.iplot(subplots=True, shape=(4,1), shared_xaxes=True, fill=True, filename='cufflinks/simple-subplots')
Add subplot titles with subplot_titles as a list of titles or True to use column names.
df.iplot(subplots=True, subplot_titles=True, legend=False)
df.scatter_matrix(filename='cufflinks/scatter-matrix-subplot', world_readable=True)
cf.datagen.heatmap(20,20).iplot(kind='heatmap',colorscale='spectral',
filename='cufflinks/simple-heatmap')
Use hline and vline for horizontal and vertical lines.
df=cf.datagen.lines(3,columns=['a','b','c'])
df.iplot(hline=[2,4],vline=['2015-02-10'])
df.iplot(hspan=[(-1,1),(2,5)], filename='cufflinks/shaded-regions')
Extra parameters can be passed in the form of dictionaries, width, fill, color, fillcolor, opacity
df.iplot(vspan={'x0':'2015-02-15','x1':'2015-03-15','color':'rgba(30,30,30,0.3)','fill':True,'opacity':.4},
filename='cufflinks/custom-regions')
cufflinks is designed for simple one-line charting with Pandas and Plotly. All of the Plotly chart attributes are not directly assignable in the df.iplot call signature. To update attributes of a cufflinks chart that aren't available, first convert it to a figure (asFigure=True), then tweak it, then plot it with plotly.plotly.iplot. Here is an example of a simple plotly figure.
from plotly.graph_objs import *
plotly.offline.iplot({
'data': [
Bar(**{
'x': [1, 2, 3],
'y': [3, 1, 5],
'name': 'first trace',
'type': 'bar'
}),
Bar(**{
'x': [1, 2, 3],
'y': [4, 3, 6],
'name': 'second trace',
'type': 'bar'
})
],
'layout': Layout(**{
'title': 'simple example'
})
}, filename='cufflinks/simple-plotly-example')
cufflinks generates these figure's that describe plotly graphs. For example, this graph:
df.iplot(kind='scatter', filename='cufflinks/simple-scatter-example')
figure = df.iplot(kind='scatter', asFigure=True)
print(figure.to_string())
So, if you want to edit any attribute of a Plotly graph from cufflinks, first convert it to a figure and then edit the figure objects. Let's add a yaxis title, tick suffixes, and new legend names to this example:
figure['layout']['yaxis1'].update({'title': 'Price', 'tickprefix': '$'})
for i, trace in enumerate(figure['data']):
trace['name'] = 'Trace {}'.format(i)
plotly.offline.iplot(figure, filename='customized-chart.html')